Install required packages and load libraries

# install.packages('Seurat')
# remotes::install_github('chris-mcginnis-ucsf/DoubletFinder')
# install.packages("devtools")
# BiocManager::install("SingleCellExperiment")
# devtools::install_github("immunogenomics/harmony")

library(Seurat)
library(ggplot2)
library(dplyr)
library(DoubletFinder)
library(cowplot)
library(SingleCellExperiment)
library(harmony)

I have a single cell gene expression data for two samples: skin cancer Melanoma cell lines A375 and A375_treated. The pre-processing of the data is described in my earlier tutorial using 10X’s cellranger pipeline at: https://github.com/githubrudramani/Pipelines/blob/main/sc-RNAseq/sc-RNAseq%20cellrnager.pdf
Let’s load the data from directory

setwd("/Users/rudramanipokhrel/work/Pipelines/sc_Rnaseq/Melanoma")
# Clear memory
rm(list = ls())

# Read 10X data and create Seurat object
# load  A375 cell line data
data1 <- Read10X(data.dir = "A375_count/outs/filtered_feature_bc_matrix/")
A375 <- CreateSeuratObject(counts = data1, project = "A375", min.cells = 3, min.features = 200)

data2 <- Read10X(data.dir = "A375_treated_count/outs/filtered_feature_bc_matrix/")
A375_treated <- CreateSeuratObject(counts = data2, project = "A375_treated", min.cells = 3, min.features = 200)
dim(A375)
## [1] 21542 10870
dim(A375_treated)
## [1] 21801  9256
# Note min.cells is recommned 0.1% of total cells in the data. In this case the data has ~10000 cells.
# For now this parameters works. We will filter them further in upcoming steps. 

QC and filtering the data

Tutorial link:
http://barc.wi.mit.edu/education/hot_topics/scRNAseq_2020/SingleCell_Seurat_2020.html

First look at A375 sample

count1 <- A375@assays$RNA@counts
counts_per_cell1 <- Matrix::colSums(count1)
counts_per_gene1 <- Matrix::rowSums(count1)
genes_per_cell1 <- Matrix::colSums(count1>0) # count a gene only if it has non-zero reads mapped.
cells_per_gene1 <- Matrix::rowSums(count1>0) # only count cells where the gene is expressed

#Plot cells ranked by their number of detected genes.

plot(sort(genes_per_cell1), xlab='cell', log='y', main='genes per cell (ordered)')

# plot minimum and maximum cut-off

abline(h=350, col='blue')  # lower threshold
abline(h=5000, col='red') # upper threshold

We can remove the cell having less than 350 genes and more than 5000 genes. Extremely high number of detected genes could indicate doublets. However, depending on the cell type composition in your sample, you may have cells with higher number of genes (and also higher counts) from one cell type. In this case, we will run doublet prediction further down

Examine percentage of mitochondrial read content
In a cell, the % of read from mitochondrial genome is indicative of quality of cells. High fraction of mitochondrial content might be due to damaged cells during tissue isolation, because if cytoplasmic mRNA has leaked out through a broken membrane, only mRNA located in the mitochondria is still conserved. However specific biology of interest may be attributed to the high mitochondrial counts. In general it is wise to remove cells having high mitochondrial contents. get mitocondrial genes in count matrix

mito_genes1 <- grep("^mt-", rownames(count1) , ignore.case=T, value=T)

# compute mitochondrial percentages

mito_gene_counts1 = Matrix::colSums(count1[mito_genes1,])
pct_mito1 = mito_gene_counts1 / counts_per_cell1 * 100
plot(sort(pct_mito1), xlab = "cells sorted by percentage mitochondrial counts", ylab = "percentage mitochondrial counts")
cutoff <- median(pct_mito1) + 2 *sd(pct_mito1)
cutoff
## [1] 34.04232
abline(h=cutoff, col='orange')

# we can remove the cell mt content more than cutoff

Implementing inbuilt seurat packages to do qc and filtering:

A375[["percent.mt"]] <- PercentageFeatureSet(object = A375, pattern = "^MT-") 

# Make violin plot
VlnPlot(A375, c("nCount_RNA", "nFeature_RNA", "percent.mt"), pt.size = 0.1)

# Apply the filter:
A375_filt <- subset(A375, subset = 
                           nCount_RNA < 20000 & 
                           nFeature_RNA > 350 & 
                           percent.mt < cutoff)

VlnPlot(A375_filt, c("nCount_RNA", "nFeature_RNA", "percent.mt"), pt.size = 0.1)

Filtering the doublets

Doublets/Mulitples of cells in the same well/droplet is a common issue in scRNAseq protocols. Especially in droplet-based methods whith overloading of cells. In a typical 10x experiment the proportion of doublets is linearly dependent on the amount of loaded cells. As indicated from the Chromium user guide: https://assets.ctfassets.net/an68im79xiti/1eX2FPdpeCgnCJtw4fj9Hx/7cb84edaa9eca04b607f9193162994de/CG000204_ChromiumNextGEMSingleCell3_v3.1_Rev_D.pdf
For my data I am using doublet rates for ~10000 recovered cell as 7.6%. Follow this tutorial for more detail:
https://nbisweden.github.io/workshop-scRNAseq/labs/compiled/seurat/seurat_01_qc.html#Predict_doublets

A375_filt = NormalizeData(A375_filt)
A375_filt = FindVariableFeatures(A375_filt,selection.method = "vst", nfeatures = 2500, verbose = F)
all.genes <- rownames(A375_filt)
A375_filt = ScaleData(A375_filt,features = all.genes, verbose = F)
A375_filt = RunPCA(A375_filt, verbose = F, npcs = 20)
A375_filt = RunUMAP(A375_filt, dims = 1:10, verbose = F)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
nExp <- round(ncol(A375_filt) * 0.076)  # expect 7.6% doublets
A375_filt <- doubletFinder_v3(A375_filt, pN = 0.25, pK = 0.09, nExp = nExp, PCs = 1:10)
## [1] "Creating 3366 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
# Name of the DF prediction can change, so extract the correct column name.
DF.name = colnames(A375_filt@meta.data)[grepl("DF.classification", colnames(A375_filt@meta.data))]
DimPlot(A375_filt) + NoAxes()

cowplot::plot_grid(ncol = 2, DimPlot(A375_filt, group.by = "orig.ident") + NoAxes(), 
                   DimPlot(A375_filt, group.by = DF.name) + NoAxes())

VlnPlot(A375_filt, features = "nFeature_RNA", group.by = DF.name, pt.size = 0.1)

dim(A375_filt)
## [1] 21542 10099
A375_filt = A375_filt[, A375_filt@meta.data[, DF.name] == "Singlet"]
dim(A375_filt)
## [1] 21542  9331
VlnPlot(A375_filt, features = "nFeature_RNA", pt.size = 0.1)

Apply QC and filter for A375_treated sample

count2 <- A375_treated@assays$RNA@counts
counts_per_cell2 <- Matrix::colSums(count2)
counts_per_gene2 <- Matrix::rowSums(count2)
genes_per_cell2 <- Matrix::colSums(count2>0) # count a gene only if it has non-zero reads mapped.
cells_per_gene2 <- Matrix::rowSums(count2>0) # only count cells where the gene is expressed

#Plot cells ranked by their number of detected genes.
plot(sort(genes_per_cell2), xlab='cell', log='y', main='genes per cell (ordered)')

# plot minimum and maximum cut-off
abline(h=350, col='blue')  # lower threshold
abline(h=5000, col='red') # upper threshold

#Examine percent mitochondrial read content.
mito_genes2 <- grep("^mt-", rownames(count2) , ignore.case=T, value=T)
mito_genes2
##  [1] "MT-ND1"  "MT-ND2"  "MT-CO1"  "MT-CO2"  "MT-ATP8" "MT-ATP6" "MT-CO3" 
##  [8] "MT-ND3"  "MT-ND4L" "MT-ND4"  "MT-ND5"  "MT-ND6"  "MT-CYB"
# compute mitochondrial percentages
mito_gene_counts2 = Matrix::colSums(count2[mito_genes2,])
pct_mito2 = mito_gene_counts2 / counts_per_cell2 * 100
plot(sort(pct_mito2), xlab = "cells sorted by percentage mitochondrial counts", ylab = "percentage mitochondrial counts")
cutoff2 <- median(pct_mito2) + 2 *sd(pct_mito2)
abline(h=cutoff2, col='orange')

# Implementing inbuilt seurat packages to to qc and filtering:
A375_treated[["percent.mt"]] <- PercentageFeatureSet(object = A375_treated, pattern = "^MT-") 

# Make violin plot
VlnPlot(A375_treated, c("nCount_RNA", "nFeature_RNA", "percent.mt"), pt.size = 0.1)

# Apply the filter:
A375_treated_filt <- subset(A375_treated, subset = 
                      nCount_RNA < 20000 & 
                      nFeature_RNA > 350 & 
                      percent.mt < cutoff)

VlnPlot(A375_treated_filt, c("nCount_RNA", "nFeature_RNA", "percent.mt"), pt.size = 0.1)

## ----------------------------------------------------------------------------------------
# Filtering the doublets
## ----------------------------------------------------------------------------------------

A375_treated_filt = NormalizeData(A375_treated_filt)
A375_treated_filt = FindVariableFeatures(A375_treated_filt,  selection.method = "vst", nfeatures = 2500, verbose = F)
all.genes = rownames(A375_treated_filt)
A375_treated_filt = ScaleData(A375_treated_filt, features = all.genes, verbose = F)
A375_treated_filt = RunPCA(A375_treated_filt, verbose = F, npcs = 20)
A375_treated_filt = RunUMAP(A375_treated_filt, dims = 1:10, verbose = F)

nExp <- round(ncol(A375_treated_filt) * 0.076)  # expect 7.6% doublets
A375_treated_filt <- doubletFinder_v3(A375_treated_filt, pN = 0.25, pK = 0.09, nExp = nExp, PCs = 1:10)
## [1] "Creating 2924 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DF.name = colnames(A375_treated_filt@meta.data)[grepl("DF.classification", colnames(A375_treated_filt@meta.data))]
DimPlot(A375_treated_filt) + NoAxes()

cowplot::plot_grid(ncol = 2, DimPlot(A375_treated_filt, group.by = "orig.ident") + NoAxes(), 
                   DimPlot(A375_treated_filt, group.by = DF.name) + NoAxes())

VlnPlot(A375_treated_filt, features = "nFeature_RNA", group.by = DF.name, pt.size = 0.1)

A375_treated_filt = A375_treated_filt[,A375_treated_filt@meta.data[, DF.name] == "Singlet"]
VlnPlot(A375_treated_filt, features = "nFeature_RNA", pt.size = 0.1)

Let’s see how many cells we filtered

dim(A375)
## [1] 21542 10870
dim(A375_treated)
## [1] 21801  9256
dim(A375_filt)
## [1] 21542  9331
dim(A375_treated_filt)
## [1] 21801  8106

Save the objects in so that we don’t need to run the process again.

saveRDS(A375_filt, file = "A375_filt.rds")
saveRDS(A375_treated_filt, file = "A375_treated_filt.rds")

Integrate the data

For samples prepared at different time frames at different kits, we can integrate using various packages to remove the batch or biases. First, I am going to use seurat integrate package which uses Canonical correlation analysis (CCA). The method selects features that are repeatedly variable across datasets for integration. Remember to normalize and identify variable features for each dataset independently.

obj.list <- c(A375_filt, A375_treated_filt)
features <- SelectIntegrationFeatures(object.list = obj.list)
anchors <- FindIntegrationAnchors(object.list = obj.list, anchor.features = features)
## Warning in CheckDuplicateCellNames(object.list = object.list): Some cell names
## are duplicated across objects provided. Renaming to enforce unique cell names.
seurat_integrated <- IntegrateData(anchorset = anchors)
DefaultAssay(seurat_integrated)
## [1] "integrated"

NOTE: If you look at your seurat object, the default assay as changed from ‘RNA’ to ‘integrated’ this can be change anytime using the line below this would be the same way you would change between scRNA-seq and scATAC-seq
DefaultAssay(seurat_integrated) <- “RNA”

Now run the standard workflow for visualization and clustering

#seurat_integrated <- ScaleData(seurat_integrated, vars.to.regress = c("nCount_RNA"), verbose = FALSE)
seurat_integrated <- ScaleData(seurat_integrated, verbose = FALSE)
seurat_integrated <- RunPCA(seurat_integrated, npcs = 30, verbose = FALSE)
seurat_integrated <- RunUMAP(seurat_integrated, reduction = "pca", dims = 1:30)
seurat_integrated <- FindNeighbors(seurat_integrated, reduction = "pca", dims = 1:30)
seurat_integrated <- FindClusters(seurat_integrated, resolution = 0.5, 
                                  algorithm = 1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 17437
## Number of edges: 729156
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8826
## Number of communities: 9
## Elapsed time: 2 seconds
# I prefer using Leiden algorithm (4),  the default is Louvain (1). You need to install it using
#pip install leidenalg

Proportional Plots

Let’s see what proportion of our total cells reside in each cluster

prop.table(table(Idents(seurat_integrated)))
## 
##          0          1          2          3          4          5          6 
## 0.23507484 0.18655732 0.14044847 0.13224752 0.11171646 0.06623846 0.05499799 
##          7          8 
## 0.04318403 0.02953490
## --------------------------------------------------------------------------------------------------
# 1. sample specific
## --------------------------------------------------------------------------------------------------
pt <- table(Idents(seurat_integrated), seurat_integrated$orig.ident)
pt <- as.data.frame(pt)
pt$Var1 <- as.character(pt$Var1)
pt$Var1 <- factor(pt$Var1, level =c("0", "1",  "2"  ,"3" , "4"  ,"5" , "6" , "7"))
                                    

ggplot(pt, aes(x = Var1, y = Freq, fill = Var2)) +
  theme_bw(base_size = 15) +
  geom_col(position = "fill", width = 0.5) +
  xlab("Sample") +
  ylab("Proportion") +
  #scale_fill_manual(values = brewer.pal(12, "Paired")) +
  theme(legend.title = element_blank())

## --------------------------------------------------------------------------------------------------
# 2. cell cycle specific
## --------------------------------------------------------------------------------------------------
# I have created the list of human cell cycle genes from this source:
# "https://raw.githubusercontent.com/hbc/tinyatlas/master/cell_cycle/Homo_sapiens.csv>"
  
cc_genes <- read.csv("https://raw.githubusercontent.com/githubrudramani/Bioinformatics/master/cellcyclegenes.csv")
s_genes <- cc_genes[ cc_genes$phase=="S",]$ext_gene
g2m_genes <- cc_genes[ cc_genes$phase=="G2/M",]$ext_gene
seurat_integrated <- CellCycleScoring(object = seurat_integrated, g2m.features = g2m_genes, 
                              s.features = s_genes)
## Warning: The following features are not present in the object: UBR7, RFC2,
## RAD51, MCM2, TIPIN, MCM6, UNG, POLD3, CDC45, MSH2, MCM5, RAD51AP1, GMNN, RPA2,
## CASP8AP2, NASP, DSCC1, CDCA7, CHAF1B, RRM1, FEN1, PRIM1, UHRF1, not searching
## for symbol synonyms
## Warning: The following features are not present in the object: CBX5, RANGAP1,
## CTCF, PIMREG, ANP32E, LBR, CKS1B, JPT1, not searching for symbol synonyms
table(seurat_integrated$seurat_clusters)
## 
##    0    1    2    3    4    5    6    7    8 
## 4099 3253 2449 2306 1948 1155  959  753  515
pt <- table(Idents(seurat_integrated), seurat_integrated$Phase)
pt <- as.data.frame(pt)
pt$Var1 <- as.character(pt$Var1)
pt$Var1 <- factor(pt$Var1, level =c("0","1",  "2"  ,"3" , "4"  ,"5" , "6" , "7"))

ggplot(pt, aes(x = Var1, y = Freq, fill = Var2)) +
  theme_bw(base_size = 15) +
  geom_col(position = "fill", width = 0.5) +
  xlab("Cluster") +
  ylab("Proportion") +
  #scale_fill_manual(values = brewer.pal(3, "YlOrBr")) +
  theme(legend.title = element_blank())

Let’s see the UMAP and clusters:

DimPlot(seurat_integrated, reduction = "umap", label = TRUE)

DimPlot(seurat_integrated, reduction = "umap", 
        split.by = "orig.ident", # this facets the plot 
        group.by = "Phase", # labels the cells with values from your group.by variable
        label = FALSE)

DimPlot(seurat_integrated, reduction = "umap", 
        split.by = "orig.ident", # this facets the plot 
        group.by = "seurat_clusters", # labels the cells with values from your group.by variable
        label = TRUE)

Finding the marker genes for a cluster.

Seurat has differenct functions like FindMarkers to examine statistical analysis between two clusters, FindConservedMarkers to find conserved genes across samples for a given cluster, and FindAllMarkers iteratevely uses FindMarkers to find markers genes for each cluster vs rest.

cluster.markers <- FindAllMarkers(seurat_integrated, only.pos = TRUE, 
                                  min.pct = 0.25, logfc.threshold = 0.25)
# let's take the top 5 marker genes for each cluster and plot as a heatmap
top5 <- cluster.markers %>% 
  group_by(cluster) %>%
  top_n(n = 5, wt = avg_log2FC)
DoHeatmap(seurat_integrated, features = top5$gene)

# You can extract markers for particular cluster
cluster0 <- cluster.markers %>% filter(cluster == "0")
cluster0$pct.diff <- cluster0$pct.1 - cluster0$pct.2
cluster0 <- cluster0[order(-cluster0$pct.diff),]
head(cluster0)
##               p_val avg_log2FC pct.1 pct.2     p_val_adj cluster  gene pct.diff
## MTDH   0.000000e+00  0.5856496 0.926 0.551  0.000000e+00       0  MTDH    0.375
## SRM   5.931359e-282  0.3237898 0.811 0.443 1.186272e-278       0   SRM    0.368
## TPI1  1.395593e-270  0.3930449 0.923 0.572 2.791186e-267       0  TPI1    0.351
## GUK1  1.509491e-306  0.2657783 0.801 0.451 3.018983e-303       0  GUK1    0.350
## AP2M1 2.439654e-270  0.3274912 0.763 0.413 4.879308e-267       0 AP2M1    0.350
## PFN1   0.000000e+00  0.4384106 0.941 0.597  0.000000e+00       0  PFN1    0.344
# we can plot gene of interest
genes <- c("TPI1", "MTDH")
FeaturePlot(seurat_integrated, 
            reduction = "umap", 
            features = genes,
            pt.size = 0.4, 
            order = TRUE,
            split.by = "orig.ident",
            min.cutoff = 'q10',
            label = FALSE)

VlnPlot(seurat_integrated, features = genes)

# Dot plot of top 2 genes in a cluster
top2 <- cluster.markers %>% 
  group_by(cluster) %>%
  top_n(n = 2, wt = avg_log2FC)

DotPlot(seurat_integrated, features=unique(top2$gene),  dot.scale = 6)  + 
  RotatedAxis() +theme(axis.text.x = element_text(color = "black", size = 10))

I am going to find markers genes across samples, as we do in bulk RNAseq analysis.
seuratDE <- seurat_integrated
Idents(seuratDE) <- "orig.ident"
markersDE <- FindMarkers(object = seuratDE,ident.1 = "A375_treated",
                         ident.2 = "A375",
                         min.pct = 0.25)

topDE <- markersDE[(markersDE$avg_log2FC >0.25) | (markersDE$avg_log2FC < -0.25),]
topDE <- topDE[order(-(topDE$avg_log2FC)),]

head(topDE)
##                 p_val avg_log2FC pct.1 pct.2     p_val_adj
## FXYD3   4.332582e-240  0.3199409 0.380 0.056 8.665164e-237
## RAD51B   5.833668e-81  0.2752425 0.789 0.386  1.166734e-77
## NAV2     1.444413e-51  0.2552213 0.872 0.607  2.888825e-48
## MRPL12   2.290478e-75 -0.2548408 0.584 0.392  4.580957e-72
## ANXA1    2.385762e-08 -0.2559625 0.728 0.507  4.771525e-05
## CARHSP1  2.601973e-52 -0.2567430 0.626 0.464  5.203945e-49
tail(topDE)
##                  p_val avg_log2FC pct.1 pct.2     p_val_adj
## RPS20    3.168520e-135 -0.4553616 0.937 0.879 6.337039e-132
## SH3BGRL3  1.323686e-59 -0.4646193 0.679 0.512  2.647372e-56
## TMSB10   5.504026e-101 -0.4682699 0.908 0.827  1.100805e-97
## UBE2S     5.800565e-52 -0.4703558 0.828 0.682  1.160113e-48
## FTL      7.563389e-137 -0.5330453 0.889 0.808 1.512678e-133
## MT2A      6.952119e-68 -0.6062146 0.876 0.783  1.390424e-64
# Upregulated in treated samples
genes <- c("RAD51B", "FXYD3")
FeaturePlot(seuratDE, 
            reduction = "umap", 
            features = genes,
            pt.size = 0.4, 
            order = TRUE,
            split.by = "orig.ident",
            min.cutoff = 'q10',
            label = FALSE)

# Downregulated in treated samples
genes <- c("RPS20", "TMSB10")
FeaturePlot(seuratDE, 
            reduction = "umap", 
            features = genes,
            pt.size = 0.4, 
            order = TRUE,
            split.by = "orig.ident",
            min.cutoff = 'q10',
            label = FALSE)

genes <- c("RAD51B", "FXYD3", "RPS20", "TMSB10")
DotPlot(seuratDE, features=genes,  dot.scale = 6)  + 
  RotatedAxis() +theme(axis.text.x = element_text(color = "black", size = 10))

saveRDS(seurat_integrated, file = "seurat_integrated_CCA.rds")

Let’s integrate with Harmony

I always prefer Harmony for seurat objects integration, first it is faster than CCA and studies recommend Harmony integration in comparasion to other methods. You can follow this paper for comparative analysis: https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1850-9

# First merged the dublet filtered object
merged_filt <- merge(A375_filt, A375_treated_filt)
## Warning in CheckDuplicateCellNames(object.list = objects): Some cell names are
## duplicated across objects provided. Renaming to enforce unique cell names.
table(merged_filt$orig.ident)
## 
##         A375 A375_treated 
##         9331         8106
# Dimension Reduction
merged_filt <- merged_filt %>% 
  Seurat::NormalizeData(verbose = FALSE) %>%
  FindVariableFeatures(selection.method = "vst", nfeatures = 2500) %>% 
  ScaleData(verbose = FALSE, features = rownames(merged_filt) ) %>% 
  RunPCA(npcs = 20, verbose = FALSE)

# Plot befor applying harmony
options(repr.plot.height = 5, repr.plot.width = 12)
p1 <- DimPlot(object = merged_filt, reduction = "pca", pt.size = .1, group.by = "orig.ident")
p2 <- VlnPlot(object = merged_filt, features = "PC_1", group.by = "orig.ident", pt.size = .1)
plot(p1)

plot(p1)

# Apply Harmony Integration:
seurat.HM <- merged_filt %>%
  RunHarmony("orig.ident", plot_convergence = TRUE)
## Warning: Invalid name supplied, making object name syntactically valid. New
## object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details
## on syntax validity

DimPlot(object = seurat.HM, reduction = "harmony", pt.size = .1, group.by = "orig.ident")

# Apply standard workflow for UMAP and clustering 
seurat.HM <- seurat.HM %>% 
  RunUMAP(reduction = "harmony", dims = 1:20) %>% 
  FindNeighbors(reduction = "harmony", dims = 1:20) %>% 
  FindClusters(resolution = 0.5) %>% 
  identity()
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 17437
## Number of edges: 584179
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8724
## Number of communities: 8
## Elapsed time: 2 seconds
DimPlot(seurat.HM, reduction = "umap", 
        split.by = "orig.ident", # this facets the plot 
        group.by = "seurat_clusters", # labels the cells with values from your group.by variable
        label = TRUE)

##---------------------------------------------------------------------------
# Let's do same analysis as we did above for CCA integrated object
##---------------------------------------------------------------------------------

# Finding the marker genes for a cluster vs rest
cluster.markersHM <- FindAllMarkers(seurat.HM, only.pos = TRUE, 
                                  min.pct = 0.25, logfc.threshold = 0.25)
                                  
# let's take the top 5 marker genes for each cluster and plot as a heatmap
top5 <- cluster.markersHM %>% 
  group_by(cluster) %>%
  top_n(n = 5, wt = avg_log2FC)
DoHeatmap(seurat.HM, features = top5$gene)

# You can extract markers for particular cluster
cluster0HM <- cluster.markersHM %>% filter(cluster == "0")
cluster0HM$pct.diff <- cluster0HM$pct.1 - cluster0HM$pct.2
cluster0HM <- cluster0HM[order(-cluster0HM$pct.diff),]
head(cluster0)
##               p_val avg_log2FC pct.1 pct.2     p_val_adj cluster  gene pct.diff
## MTDH   0.000000e+00  0.5856496 0.926 0.551  0.000000e+00       0  MTDH    0.375
## SRM   5.931359e-282  0.3237898 0.811 0.443 1.186272e-278       0   SRM    0.368
## TPI1  1.395593e-270  0.3930449 0.923 0.572 2.791186e-267       0  TPI1    0.351
## GUK1  1.509491e-306  0.2657783 0.801 0.451 3.018983e-303       0  GUK1    0.350
## AP2M1 2.439654e-270  0.3274912 0.763 0.413 4.879308e-267       0 AP2M1    0.350
## PFN1   0.000000e+00  0.4384106 0.941 0.597  0.000000e+00       0  PFN1    0.344
head(cluster0HM)
##           p_val avg_log2FC pct.1 pct.2 p_val_adj cluster      gene pct.diff
## KCNQ5         0   1.190401 0.747 0.420         0       0     KCNQ5    0.327
## SOX5          0   1.172482 0.852 0.527         0       0      SOX5    0.325
## PDE4D         0   1.139245 0.808 0.490         0       0     PDE4D    0.318
## LRMDA         0   1.081837 0.875 0.575         0       0     LRMDA    0.300
## IMMP2L        0   1.261109 0.952 0.652         0       0    IMMP2L    0.300
## LINC01320     0   1.306881 0.960 0.664         0       0 LINC01320    0.296
# we can plot gene of interest
genes <- c("KCNQ5", "SOX5")
FeaturePlot(seurat.HM, 
            reduction = "umap", 
            features = genes,
            pt.size = 0.4, 
            order = TRUE,
            split.by = "orig.ident",
            min.cutoff = 'q10',
            label = FALSE)

VlnPlot(seurat_integrated, features = genes)

# Dot plot of top 2 genes in a cluster
top2 <- cluster.markersHM %>% 
  group_by(cluster) %>%
  top_n(n = 2, wt = avg_log2FC)

DotPlot(seurat.HM, features=unique(top2$gene),  dot.scale = 6)  + 
  RotatedAxis() +theme(axis.text.x = element_text(color = "black", size = 10))

## DE markers between two samples 
seuratDE <- seurat.HM
Idents(seuratDE) <- "orig.ident"
markersDE <- FindMarkers(object = seuratDE,ident.1 = "A375_treated",
                         ident.2 = "A375",
                         min.pct = 0.25)

topDE <- markersDE[(markersDE$avg_log2FC >0.25) | (markersDE$avg_log2FC < -0.25),]
topDE <- topDE[order(-(topDE$avg_log2FC)),]

head(topDE)
##                p_val avg_log2FC pct.1 pct.2     p_val_adj
## NLGN1   0.000000e+00  1.2194070 0.692 0.380  0.000000e+00
## FXYD3   0.000000e+00  1.1935889 0.392 0.056  0.000000e+00
## IMMP2L 5.713169e-287  0.7962357 0.811 0.645 1.309001e-282
## HMGA2   0.000000e+00  0.7888583 0.649 0.386  0.000000e+00
## FN1    5.064772e-283  0.7857883 0.710 0.486 1.160440e-278
## SOX5   4.681670e-267  0.7775778 0.714 0.507 1.072664e-262
tail(topDE)
##                   p_val avg_log2FC pct.1 pct.2     p_val_adj
## ACTG1      0.000000e+00 -0.7105564 0.942 0.943  0.000000e+00
## LINC01918 1.511004e-106 -0.7269357 0.344 0.463 3.462012e-102
## APOE      3.880942e-162 -0.7566181 0.323 0.491 8.892014e-158
## COX5B      0.000000e+00 -0.8267984 0.802 0.862  0.000000e+00
## CAPG       0.000000e+00 -0.8386470 0.748 0.810  0.000000e+00
## ERCC6L2    0.000000e+00 -1.5870124 0.201 0.577  0.000000e+00
# Upregulated in treated samples
genes <- c("NLGN1", "FXYD3")
FeaturePlot(seuratDE, 
            reduction = "umap", 
            features = genes,
            pt.size = 0.4, 
            order = TRUE,
            split.by = "orig.ident",
            min.cutoff = 'q10',
            label = FALSE)

# Downregulated in treated samples
genes <- c("ACTG1", "LINC01918")
FeaturePlot(seuratDE, 
            reduction = "umap", 
            features = genes,
            pt.size = 0.4, 
            order = TRUE,
            split.by = "orig.ident",
            min.cutoff = 'q10',
            label = FALSE)

genes <- c("NLGN1", "FXYD3", "ACTG1", "LINC01918")
DotPlot(seuratDE, features=genes,  dot.scale = 6)  + 
  RotatedAxis() +theme(axis.text.x = element_text(color = "black", size = 10))

I find Harmony Integration gives better overall result for finding clusters and markers.

Assigning identity to cell clusters

1. Use SingleR package

Tutorial:
https://bioconductor.org/packages/release/bioc/vignettes/SingleR/inst/doc/SingleR.html

# First install the required packages
#BiocManager::install(c("scater", "scran","DropletUtils","SingleR","celldex" ))
#devtools::install_github("Irrationone/cellassign")
library(scater) #quality control and visualization for scRNA-seq data
library(scran) #for low level processing of scRNA-seq data
library(DropletUtils)
library(tensorflow)
library(cellassign) 
library(SingleR) #automated cell type annotation ('label transfer') using reference data
library(celldex) #a large collection of reference expression datasets with curated cell type labels for use with SingleR package
library(pheatmap)

# convert to single cell experiment
seurat.HM.sce <- as.SingleCellExperiment(seurat.HM)

# I am using Human Cell Atlas data as reference. You can play around with many data source given in the package.

ref <- HumanPrimaryCellAtlasData()
predictions <- SingleR(test=seurat.HM.sce, assay.type.test=1, 
                       ref=ref, labels=ref$label.main)

plotScoreHeatmap(predictions)

# Add back to singleCellExperiment object (or Seurat objects)
seurat.HM.sce[["SingleR.labels"]] <- predictions$labels
plotUMAP(seurat.HM.sce, colour_by = "SingleR.labels")

seurat.HM2 <- as.Seurat(seurat.HM.sce, counts = NULL)
DimPlot(seurat.HM2, reduction = "UMAP", 
        split.by = "orig.ident", # this facets the plot 
        group.by = "SingleR.labels", # labels the cells with values from your group.by variable
        label = TRUE)


2. Using sc-type

Tutorial:
https://github.com/IanevskiAleksandr/sc-type

# load libraries and functions
#install.packages("HGNChelper")
lapply(c("dplyr","Seurat","HGNChelper"), library, character.only = T)
## [[1]]
##  [1] "pheatmap"             "celldex"              "SingleR"             
##  [4] "cellassign"           "tensorflow"           "DropletUtils"        
##  [7] "scran"                "scater"               "scuttle"             
## [10] "KernSmooth"           "fields"               "viridis"             
## [13] "viridisLite"          "spam"                 "harmony"             
## [16] "Rcpp"                 "SingleCellExperiment" "SummarizedExperiment"
## [19] "Biobase"              "GenomicRanges"        "GenomeInfoDb"        
## [22] "IRanges"              "S4Vectors"            "BiocGenerics"        
## [25] "stats4"               "MatrixGenerics"       "matrixStats"         
## [28] "cowplot"              "DoubletFinder"        "dplyr"               
## [31] "ggplot2"              "SeuratObject"         "Seurat"              
## [34] "stats"                "graphics"             "grDevices"           
## [37] "utils"                "datasets"             "methods"             
## [40] "base"                
## 
## [[2]]
##  [1] "pheatmap"             "celldex"              "SingleR"             
##  [4] "cellassign"           "tensorflow"           "DropletUtils"        
##  [7] "scran"                "scater"               "scuttle"             
## [10] "KernSmooth"           "fields"               "viridis"             
## [13] "viridisLite"          "spam"                 "harmony"             
## [16] "Rcpp"                 "SingleCellExperiment" "SummarizedExperiment"
## [19] "Biobase"              "GenomicRanges"        "GenomeInfoDb"        
## [22] "IRanges"              "S4Vectors"            "BiocGenerics"        
## [25] "stats4"               "MatrixGenerics"       "matrixStats"         
## [28] "cowplot"              "DoubletFinder"        "dplyr"               
## [31] "ggplot2"              "SeuratObject"         "Seurat"              
## [34] "stats"                "graphics"             "grDevices"           
## [37] "utils"                "datasets"             "methods"             
## [40] "base"                
## 
## [[3]]
##  [1] "HGNChelper"           "pheatmap"             "celldex"             
##  [4] "SingleR"              "cellassign"           "tensorflow"          
##  [7] "DropletUtils"         "scran"                "scater"              
## [10] "scuttle"              "KernSmooth"           "fields"              
## [13] "viridis"              "viridisLite"          "spam"                
## [16] "harmony"              "Rcpp"                 "SingleCellExperiment"
## [19] "SummarizedExperiment" "Biobase"              "GenomicRanges"       
## [22] "GenomeInfoDb"         "IRanges"              "S4Vectors"           
## [25] "BiocGenerics"         "stats4"               "MatrixGenerics"      
## [28] "matrixStats"          "cowplot"              "DoubletFinder"       
## [31] "dplyr"                "ggplot2"              "SeuratObject"        
## [34] "Seurat"               "stats"                "graphics"            
## [37] "grDevices"            "utils"                "datasets"            
## [40] "methods"              "base"
# load gene set preparation function
source("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/R/gene_sets_prepare.R")
# load cell type annotation function
source("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/R/sctype_score_.R")

# DB file
db_ = "https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/ScTypeDB_full.xlsx";
tissue = "Brain" # e.g. Immune system, Liver, Pancreas, Kidney, Eye, Brain

# prepare gene sets
gs_list = gene_sets_prepare(db_, tissue)
DefaultAssay(seurat.HM)
## [1] "RNA"
# get cell-type by cell matrix
es.max = sctype_score(scRNAseqData = seurat.HM[["RNA"]]@scale.data, scaled = TRUE, 
                      gs = gs_list$gs_positive, gs2 = gs_list$gs_negative)
# merge by cluster
cL_resutls = do.call("rbind", lapply(unique(seurat.HM@meta.data$seurat_clusters), function(cl){
  es.max.cl = sort(rowSums(es.max[ ,rownames(seurat.HM@meta.data[seurat.HM@meta.data$seurat_clusters==cl, ])]), decreasing = !0)
  head(data.frame(cluster = cl, type = names(es.max.cl), scores = es.max.cl, ncells = sum(seurat.HM@meta.data$seurat_clusters==cl)), 10)
}))

sctype_scores = cL_resutls %>% group_by(cluster) %>% top_n(n = 1, wt = scores) 

# set low-confident (low ScType score) clusters to "unknown"
sctype_scores$type[as.numeric(as.character(sctype_scores$scores)) < sctype_scores$ncells/4] = "Unknown"
print(sctype_scores[,1:3])
## # A tibble: 8 × 3
## # Groups:   cluster [8]
##   cluster type                     scores
##   <fct>   <chr>                     <dbl>
## 1 3       Immature neurons         525.  
## 2 7       Immature neurons         440.  
## 3 2       Unknown                  -60.9 
## 4 6       Unknown                   -9.16
## 5 1       Cancer cells            1274.  
## 6 0       Neural Progenitor cells 1025.  
## 7 5       Immature neurons         647.  
## 8 4       Endothelial cells        812.
# Overlay the identified cell types on UMAP plot:
seurat.HM@meta.data$customclassif = ""
for(j in unique(sctype_scores$cluster)){
  cl_type = sctype_scores[sctype_scores$cluster==j,]; 
  seurat.HM@meta.data$customclassif[seurat.HM@meta.data$seurat_clusters == j] = as.character(cl_type$type[1])
}

DimPlot(seurat.HM, reduction = "umap", label = TRUE, repel = TRUE, 
        group.by = 'customclassif') 

Plot module scores for genes of interest

I am going to show some examples how you can plot the module scores for your gene of interest. I am interested in cancer related EMT and Human Angiogenesis genes (ANG)

EMT <- list(c('ABLIM1', 'CDH1', 'COL1A1', 'COL1A2', 'COL3A1', 'COL5A1', 'COL5A2',
              'COL6A1', 'COL6A2', 'COL6A3', 'DSP', 'FN1', 'FYN', 'GNAI2', 'JUP',
              'LAMA4', 'PDGFRB', 'SERPINB5', 'ZEB2', 'CDH2', 'COL1A1', 'COL3A1',
              'COL5A1', 'COL6A1', 'COL6A2', 'EPHA1', 'ERBB3', 'FN1',
              'FYN', 'GNAI2', 'LAMA4', 'VIM'))

ANG <- list(c('AKT1', 'ANGPT1', 'ANGPT2', 'ANGPTL3', 'ANGPTL4', 'ANPEP',
              'ADGRB1', 'CCL11', 'CCL2', 'CDH5', 'COL18A1', 'COL4A3', 'CXCL1',
              'CXCL10', 'CXCL3', 'CXCL5', 'CXCL6', 'CXCL9', 'TYMP', 'S1PR1',
              'EFNA1', 'EFNA3', 'EFNB2', 'EGF', 'ENG', 'EPHB4', 'EREG', 'FGF1',
              'FGF2', 'FGFR3', 'FIGF', 'FLT1', 'HAND2', 'HGF', 'HIF1A', 'HPSE',
              'ID1', 'ID3', 'IFNA1', 'IFNB1', 'IFNG', 'IGF1', 'IL1B', 'IL6',
              'CXCL8', 'ITGAV', 'ITGB3', 'JAG1', 'KDR', 'LAMA5', 'LECT1', 'LEP',
              'MDK', 'MMP2', 'MMP9', 'NOTCH4', 'NRP1', 'NRP2', 'PDGFA', 'PECAM1',
              'PF4', 'PGF', 'PLAU', 'PLG', 'PLXDC1', 'PROK2', 'PTGS1',
              'SERPINF1', 'SPHK1', 'STAB1', 'TEK', 'TGFA', 'TGFB1', 'TGFB2',
              'TGFBR1', 'THBS1', 'THBS2', 'TIMP1', 'TIMP2', 'TIMP3', 'TNF',
              'TNFAIP2', 'VEGFA', 'VEGFC', 'B2M', 'HPRT1', 'RPL13A', 'GAPDH',
              'ACTB', 'HGDC', 'RTC', 'RTC', 'RTC', 'PPC', 'PPC', 'PPC'))


gene_list <- list(EMT,  ANG)
gene_class <- c("EMT",  "ANG")
names(gene_list) <- gene_class

# Add module score to seurat meta data
for (i in gene_class) {
  seurat.HM <- AddModuleScore(
    object = seurat.HM,
    features = gene_list[i],
    ctrl = 20,
    name = i
  )}
## Warning: The following features are not present in the object: c("ABLIM1",
## "CDH1", "COL1A1", "COL1A2", "COL3A1", "COL5A1", "COL5A2", "COL6A1", "COL6A2",
## "COL6A3", "DSP", "FN1", "FYN", "GNAI2", "JUP", "LAMA4", "PDGFRB", "SERPINB5",
## "ZEB2", "CDH2", "COL1A1", "COL3A1", "COL5A1", "COL6A1", "COL6A2", "EPHA1",
## "ERBB3", "FN1", "FYN", "GNAI2", "LAMA4", "VIM"), not searching for symbol
## synonyms
## Warning in AddModuleScore(object = seurat.HM, features = gene_list[i], ctrl =
## 20, : Could not find enough features in the object from the following feature
## lists: EMT Attempting to match case...
## Warning in grep(pattern = paste0("^", s, "$"), x = match, ignore.case = TRUE, :
## argument 'pattern' has length > 1 and only the first element will be used
## Warning: The following features are not present in the object: c("AKT1", "ANGPT1", "ANGPT2", "ANGPTL3", "ANGPTL4", "ANPEP", "ADGRB1", "CCL11", "CCL2", "CDH5", "COL18A1", "COL4A3", "CXCL1", "CXCL10", "CXCL3", "CXCL5", "CXCL6", "CXCL9", "TYMP", "S1PR1", "EFNA1", "EFNA3", "EFNB2", "EGF", "ENG", "EPHB4", "EREG", "FGF1", "FGF2", "FGFR3", "FIGF", "FLT1", "HAND2", "HGF", "HIF1A", "HPSE", "ID1", "ID3", "IFNA1", "IFNB1", "IFNG", "IGF1", "IL1B", "IL6", "CXCL8", "ITGAV", "ITGB3", "JAG1", "KDR", "LAMA5", "LECT1", "LEP", "MDK", "MMP2", "MMP9", "NOTCH4", "NRP1", "NRP2", "PDGFA", 
## "PECAM1", "PF4", "PGF", "PLAU", "PLG", "PLXDC1", "PROK2", "PTGS1", "SERPINF1", "SPHK1", "STAB1", "TEK", "TGFA", "TGFB1", "TGFB2", "TGFBR1", "THBS1", "THBS2", "TIMP1", "TIMP2", "TIMP3", "TNF", "TNFAIP2", "VEGFA", "VEGFC", "B2M", "HPRT1", "RPL13A", "GAPDH", "ACTB", "HGDC", "RTC", "RTC", "RTC", "PPC", "PPC", "PPC"), not searching for symbol synonyms
## Warning in AddModuleScore(object = seurat.HM, features = gene_list[i], ctrl =
## 20, : Could not find enough features in the object from the following feature
## lists: ANG Attempting to match case...
## Warning in grep(pattern = paste0("^", s, "$"), x = match, ignore.case = TRUE, :
## argument 'pattern' has length > 1 and only the first element will be used
# Plot Module score
data <- seurat.HM@meta.data[,c("orig.ident","seurat_clusters",
                                 "EMT1","ANG1")]
head(data)
##                      orig.ident seurat_clusters        EMT1        ANG1
## AAACCCAAGGTTCTTG-1_1       A375               3  0.00000000 -0.13509859
## AAACCCAGTGACAGCA-1_1       A375               7 -0.08659321 -0.08659321
## AAACCCAGTGTTTACG-1_1       A375               7 -0.06600254 -0.13200509
## AAACCCAGTTTGTTGG-1_1       A375               3  0.00000000  0.00000000
## AAACCCATCATTGTGG-1_1       A375               3 -0.09301711  0.00000000
## AAACCCATCCATTCAT-1_1       A375               2 -0.14821815  0.00000000
gene_name<- c( "EMT Genes","Angiogenesis Genes")

# Change a (position of gene in  gene_name) or you can use a for loop and save corresponding figure using ggsave
a = 1
i = colnames(data)[a+2]

#Cluster plot
df <- data[, c("orig.ident","seurat_clusters", i)]
colnames(df) <- c("orig.ident","seurat_clusters", "y")
ggplot(df, aes(x=seurat_clusters, y=y, fill =orig.ident )) + theme_classic() +
    geom_boxplot() +
    ggtitle(gene_name[a])+
    theme(axis.text.x = element_text(size = 15, face = "bold", angle = 0, vjust = 0.6, hjust=0.5),
          axis.text.y = element_text(size = 15, face = "bold", angle = 0, vjust = 0.5, hjust=0)
          ,axis.title.x=element_text(size=18,face="bold", vjust = 0.5 )
          ,axis.title.y=element_text(size=18,face="bold", hjust = 0.5, vjust = 1.5 ),
          plot.title = element_text(size = 20, face = "bold"),
          legend.title=element_text(size=14, face = "bold"))+
    stat_summary(fun=mean, geom="point", shape=11, size=1) +
    xlab("Clusters") +
    ylab("Module Score") +
    labs(fill = "Sample") +
    theme(plot.title = element_text(hjust = 0.5))

  #ggsave(paste0("Gene_annotations/",gene_name[a],"_cluster.pdf"), scale = 0.9)
  
# Average plot across samples
df <- data[, c("orig.ident","seurat_clusters", i)]
colnames(df) <- c("orig.ident","seurat_clusters", "y")
ggplot(df, aes(x=orig.ident, y=y, fill =orig.ident )) + theme_classic() +
    geom_boxplot() +
    ggtitle(gene_name[a])+
    theme(axis.text.x = element_text(size = 15, face = "bold", angle = 75, vjust = 0.55, hjust=0.55),
          axis.text.y = element_text(size = 15, face = "bold", angle = 0, vjust = 0.5, hjust=0)
          ,axis.title.x=element_text(size=18,face="bold", vjust = 0.5 )
          ,axis.title.y=element_text(size=18,face="bold", hjust = 0.5, vjust = 1.5 ),
          plot.title = element_text(size = 20, face = "bold"),
          legend.title=element_text(size=14, face = "bold"))+
    stat_summary(fun=mean, geom="point", shape=11, size=1) +
    xlab("Samples") +
    ylab("Module Score") +
    labs(fill = "Sample") +
    theme(plot.title = element_text(hjust = 0.5))

Trajectory Pseudo time Using Monocle 3 :

https://cole-trapnell-lab.github.io/monocle3/

# Install depencies
# BiocManager::install(c('BiocGenerics', 'DelayedArray', 'DelayedMatrixStats',
#                         'limma', 'S4Vectors', 'SingleCellExperiment',
#                        'SummarizedExperiment', 'batchelor', 'Matrix.utils'))
# install.packages("devtools")
# devtools::install_github('cole-trapnell-lab/leidenbase')
# It doest not work in my M1 mac, hence I first downloaded gfortran from
# https://github.com/fxcoudert/gfortran-for-macOS/releases, I used the intel version gfortran-Intel-11.3-Monterey.dmg
# You need to install ROSETTA to intstall intel version on M1 apple (https://machow2.com/rosetta-mac/)

# devtools::install_github('cole-trapnell-lab/monocle3')
# devtools::install_github('satijalab/seurat-data')
# remotes::install_github('satijalab/seurat-wrappers')

library(monocle3)  
library(SeuratData)
library(SeuratWrappers)
library(ggplot2)
library(patchwork)
library(magrittr)
library(scater)
library(SingleCellExperiment)

# Convert Seurat Object to sce object
seurat.HM.sce <- as.SingleCellExperiment(seurat.HM)
cds <- as.cell_data_set(DietSeurat(seurat.HM, graphs = "umap"))
cds <- cluster_cells(cds)
cds <- learn_graph(cds)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
## Warning in igraph::graph.dfs(stree_ori, root = root_cell, neimode = "all", :
## Argument `neimode' is deprecated; use `mode' instead
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
## Warning in igraph::graph.dfs(stree_ori, root = root_cell, neimode = "all", :
## Argument `neimode' is deprecated; use `mode' instead
plot_cells(cds, color_cells_by = "partition")
## Warning: `select_()` was deprecated in dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

# For interactive Rshiny please uncomment this two line

#bcds <- order_cells(cds, reduction_method = "UMAP")

# plot_cells(cds, color_cells_by = "pseudotime", label_cell_groups=FALSE, label_leaves=FALSE,
#      label_branch_points=TRUE,group_label_size = 3, labels_per_group = 3, graph_label_size = 3)

        
#-------------------------------------------------------------------------------------
# Using endothelial cells as root cell.
#-------------------------------------------------------------------------------------
DimPlot(seurat.HM, reduction = "umap", label = TRUE, repel = TRUE, 
        group.by = 'customclassif') 

end_cells <- rownames(seurat.HM@meta.data[seurat.HM@meta.data$customclassif=="Endothelial cells",])
cds <- order_cells(cds, reduction_method = "UMAP",root_cells = end_cells )
plot_cells(cds,
           color_cells_by = "pseudotime",
           label_cell_groups=FALSE,
           label_leaves=FALSE,
           label_branch_points=TRUE,           
           group_label_size = 1,
           labels_per_group = 1,
           graph_label_size = 1,
)

That’s the end of this tutorial.